## Replication Code for "The PhD Pipeline Initiative Works"
## Ryan Brutger, Last modified: 12-1-2022

## This R file contains the code necessary to replicate the analysis, including those in the main text and supplementary appendix.
## All of the following analyses were carried out using R version 4.0.1 on a Macbook Pro with Intel Core i5 processor using MacOS Catalina V. 10.15.7


#load packages for analysis
library(foreign)
library(ggplot2)
library(stargazer)
library(xtable)

setwd("ENTER DIRECTORY") # set working directory (change to your directory)

rm(list = ls(all = TRUE))
pips <- read.csv("PIPS_Replication_Data.csv")

# The following provides the coding rules for the variables used in the analysis

# Enrolled -> an indicator variable for whether a student had enrolled AND completed the PIPS program when they took the survey (1=yes, 0=no)
# Enroll_Sem -> 0=includes those who did not receive spots in PIPS or who had not yet enrolled in PIPS, 1=enrolled in spring 2021, 2= enrolled fall 2021 but had not yet completed PIPS 

# firstgen -> 1= those who self identify as "first generation college student", 0=otherwise
# For the race and ethnicty variables, each is an indicator for whether the respondent self-identified with the race or ethnicity (they could choose more than one)
# The options were: American Indian or Alaska Native; Asian or Asian American; Black or African American; Hispanic, Latino, Latina, LatinX;
# Middle Eastern or Northern African; Native Hawaiian or Other Pacific Islander; White; Another option (please specify); Prefer not to say

# Dependent Variables:
#Phd_interest: 4=very likely, 3=somewhat likely, 2=somewhat unlikely, 1=very unlikely
#Prep_app: 4=very prepared, 3=somewhat prepared, 2=not very prepared, 1=not prepared at all
#Prep_diversity: 4=very prepared, 3=somewhat prepared, 2=not very prepared, 1=not prepared at all
#Prep_research: 4=very prepared, 3=somewhat prepared, 2=not very prepared, 1=not prepared at all
#Prep_letters: 4=very prepared, 3=somewhat prepared, 2=not very prepared, 1=not prepared at all

# For Phd_interest2, Prep_app2, Prep_diversity2, Prep_research2, Prep_letters2 the earlier measures are dichotomized
# If the earlier measure was a 3 or 4, then the dichotmout measure=1, otherwise it equals 0


# Response rate calculations reported on page 5
# There were 85 students in the lottery who could have taken the pre-PIPS survey
length(pips$Enrolled[pips$Enrolled==0]) #42
42/85# 0.49

#At the time of this analysis 38 students had completed PIPS (two dropped during the semester)
# the Spring 2022 enrolled students were in-progress and had not completed PIPS yet
length(pips$Enrolled[pips$Enrolled==1]) #20 out of 28 students who had completed PIPS also completed the followup survey
20/38 # 0.53

# Demographics reported on page 6
#First Generation
length(pips$firstgen[pips$firstgen==1 & is.na(pips$firstgen)==FALSE]) #38 first generation 
length(pips$firstgen[pips$firstgen==0 & is.na(pips$firstgen)==FALSE]) #19 not first generation
38/(19+38) # 67% first generation

# Race/Ethinicty
# 11 identify as multiple races/ethnicities, which were counted manually
sum(pips$hispanic) #25
sum(pips$white) #16
sum(pips$middleE) #8
sum(pips$black) #4
sum(pips$native) #1
sum(pips$islander) #0

#Gender (not reported in paper)
sum(pips$male) #22
sum(pips$female) #31
sum(pips$non.binary) #3

# Generate Table 1 of paper
phd.interest2 <- lm(PhD_interest2 ~ Enrolled, data = pips)
prep.app2 <- lm(Prep_app2 ~ Enrolled, data = pips)
prep.diversity2 <- lm(Prep_diversity2 ~ Enrolled, data = pips)
prep.research2 <- lm(Prep_research2 ~ Enrolled, data = pips)
prep.letters2 <- lm(Prep_letters2 ~ Enrolled, data = pips)

stargazer(phd.interest2 , prep.app2, prep.diversity2, prep.research2, prep.letters2, omit.stat = c("rsq", "adj.rsq", "ser", "f"), 
          column.labels = c("PhD Interest", "Prepared \n to Apply", "Prepared \n Personal Statement", "Prepared \n  SOP", "Prepared \n LORs"))


#Generate Appendix Table 3 of section 4
phd.interest <- lm(PhD_interest ~ Enrolled, data = pips)
prep.app <- lm(Prep_app ~ Enrolled, data = pips)
prep.diversity <- lm(Prep_diversity ~ Enrolled, data = pips)
prep.research <- lm(Prep_research ~ Enrolled, data = pips)
prep.letters <- lm(Prep_letters ~ Enrolled, data = pips)

stargazer(phd.interest , prep.app, prep.diversity, prep.research, prep.letters, omit.stat = c("rsq", "adj.rsq", "ser", "f"), 
          column.labels = c("PhD Interest", "Prepared \n to Apply", "Prepared \n Personal Statement", "Prepared \n  SOP", "Prepared \n LORs"))

# Generate Appendix Table 4 of section 5
phd.interest2b <- lm(PhD_interest2 ~ Enrolled + male + white + firstgen, data = pips)
prep.app2b <- lm(Prep_app2 ~ Enrolled + male + white + firstgen, data = pips)
prep.diversity2b <- lm(Prep_diversity2 ~ Enrolled + male + white + firstgen, data = pips)
prep.research2b <- lm(Prep_research2 ~ Enrolled + male + white + firstgen, data = pips)
prep.letters2b <- lm(Prep_letters2 ~ Enrolled + male + white + firstgen, data = pips)

stargazer(phd.interest2b , prep.app2b, prep.diversity2b, prep.research2b, prep.letters2b, omit.stat = c("rsq", "adj.rsq", "ser", "f"), 
          column.labels = c("PhD Interest", "Prepared \n to Apply", "Prepared \n Personal Statement", "Prepared \n  SOP", "Prepared \n LORs"))

# Generate Appendix Table 5 of section 6:  
# Same as Table 1, but limits sample to only those who eventually enrolled in PIPS, comparing those who completed to those who had not yet started
pipsEnrolled <- subset(pips[pips$Enroll_Sem>0, ])

phd.interest2c <- lm(PhD_interest2 ~ Enrolled, data = pipsEnrolled)
prep.app2c <- lm(Prep_app2 ~ Enrolled, data = pipsEnrolled)
prep.diversity2c <- lm(Prep_diversity2 ~ Enrolled, data = pipsEnrolled)
prep.research2c <- lm(Prep_research2 ~ Enrolled, data = pipsEnrolled)
prep.letters2c <- lm(Prep_letters2 ~ Enrolled, data = pipsEnrolled)

stargazer(phd.interest2c , prep.app2c, prep.diversity2c, prep.research2c, prep.letters2c, omit.stat = c("rsq", "adj.rsq", "ser", "f"), 
          column.labels = c("PhD Interest", "Prepared \n to Apply", "Prepared \n Personal Statement", "Prepared \n  SOP", "Prepared \n LORs"))

